import jsonlines
import json
from tqdm import tqdm
new_datas = []
#with open("ultrachat_700k.jsonl") as f:
from datasets import load_dataset

datas = load_dataset("Magpie-llama3.1-pro-mt",split="train")

#with jsonlines.open("Wizard/WizardLM_evol_instruct_V2_143k.json") as f:
#    datas = json.load(f)

with jsonlines.open("Magpie-llama3_1-pro-mt_fastchat.jsonl","w") as wf:
    for d in tqdm(datas):
        convs = []
        for conv in d["conversations"]:
            if conv["value"].strip() =="":
                break
            convs.append(conv)
        if len(convs) < 2:
            continue
        d["conversations"] = convs
        wf.write(d)
